### Load packages

as.numeric.factor <- function(x) {as.numeric(levels(x))[x]}
library(plyr)
library(survey)
library(ltm)
library(ggplot2)
library(ggpubr)
library(dplyr)

### Load data

data <- read.csv("YouGov.csv")

### Define treatment

data$Treatment2 <- ifelse(data$infotreatment != "control", 1, 0)
data$TreatmentBurden2 <- ifelse(data$infotreatment == "treatment1", 1, 0)
data$TreatmentRestrictive2 <- ifelse(data$infotreatment == "treatment2", 1, 0)
data$Treatment3 <- factor(mapvalues(data$infotreatment, 
                                       from = c("control", "treatment1", "treatment2"), 
                                       to = c("Control", "Burdensome", "Restrictive")), 
                             levels =  c("Control", "Burdensome", "Restrictive"))


## Pre-treatment knowledge

#give 0.5 points for a close response
#people have no idea aunts/uncles or even doctors can't immigrate
data$Visatime_1 <- as.numeric.factor(as.factor(mapvalues(data$knowledge_adultsib, 
                                                         from = c("Less than 1 year", "1 to 3 years", "3 to 10 years", "More than 10 years", "Not eligible"), 
                                                         to = c(0, 0.5, 1, 0.5, 0)))) #3 to 10 years #87%
data$Visatime_2 <- as.numeric.factor(as.factor(mapvalues(data$knowledge_doctor, 
                                                         from = c("Less than 1 year", "1 to 3 years", "3 to 10 years", "More than 10 years", "Not eligible"), 
                                                         to = c(0, 0, 0, 0.5, 1)))) #Not eligible #62%
data$Visatime_3 <- as.numeric.factor(as.factor(mapvalues(data$knowledge_athlete, 
                                                         from = c("Less than 1 year", "1 to 3 years", "3 to 10 years", "More than 10 years", "Not eligible"), 
                                                         to = c(1, 0.5, 0, 0, 0)))) # Less than 1 year #10%
data$Visatime_4 <- as.numeric.factor(as.factor(mapvalues(data$knowledge_nanny, 
                                                         from = c("Less than 1 year", "1 to 3 years", "3 to 10 years", "More than 10 years", "Not eligible"), 
                                                         to = c(0.5, 1, 0.5, 0, 0)))) # One to 3 years #5%
data$Visatime_5 <- as.numeric.factor(as.factor(mapvalues(data$knowledge_aunt_unc, 
                                                         from = c("Less than 1 year", "1 to 3 years", "3 to 10 years", "More than 10 years", "Not eligible"), 
                                                         to = c(0, 0, 0, 0.5, 1)))) # Not eligible #60%
data$visa.know.close <- (data$Visatime_1 + data$Visatime_2 + data$Visatime_3 + data$Visatime_4 + data$Visatime_5)/5
#40% correct or almost correct > 20% correct by chance

#precise scores
data$Visatime_1b <- as.numeric.factor(as.factor(mapvalues(data$knowledge_adultsib, 
                                                          from = c("Less than 1 year", "1 to 3 years", "3 to 10 years", "More than 10 years", "Not eligible"), 
                                                          to = c(0, 0, 1, 0, 0)))) #3 to 10 years #20%
data$Visatime_2b <- as.numeric.factor(as.factor(mapvalues(data$knowledge_doctor, 
                                                          from = c("Less than 1 year", "1 to 3 years", "3 to 10 years", "More than 10 years", "Not eligible"), 
                                                          to = c(0, 0, 0, 0, 1)))) #Not eligible #9%
data$Visatime_3b <- as.numeric.factor(as.factor(mapvalues(data$knowledge_athlete, 
                                                          from = c("Less than 1 year", "1 to 3 years", "3 to 10 years", "More than 10 years", "Not eligible"), 
                                                          to = c(1, 0, 0, 0, 0)))) # Less than 1 year #48%
data$Visatime_4b <- as.numeric.factor(as.factor(mapvalues(data$knowledge_nanny, 
                                                          from = c("Less than 1 year", "1 to 3 years", "3 to 10 years", "More than 10 years", "Not eligible"), 
                                                          to = c(0, 1, 0, 0, 0)))) # One to 3 years #40%
data$Visatime_5b <- as.numeric.factor(as.factor(mapvalues(data$knowledge_aunt_unc, 
                                                          from = c("Less than 1 year", "1 to 3 years", "3 to 10 years", "More than 10 years", "Not eligible"), 
                                                          to = c(0, 0, 0, 0, 1)))) # Not eligible #9%
data$visa.know.precise <- (data$Visatime_1b + data$Visatime_2b + data$Visatime_3b + data$Visatime_4b + data$Visatime_5b)/5
#25% correct is slightly better than 20% correct by chance

#give 1 point for underestimates
data$Visatime_1c <- as.numeric.factor(as.factor(mapvalues(data$knowledge_adultsib, 
                                                          from = c("Less than 1 year", "1 to 3 years", "3 to 10 years", "More than 10 years", "Not eligible"), 
                                                          to = c(1, 1, 0, 0, 0)))) #3 to 10 years #87%
data$Visatime_2c <- as.numeric.factor(as.factor(mapvalues(data$knowledge_doctor, 
                                                          from = c("Less than 1 year", "1 to 3 years", "3 to 10 years", "More than 10 years", "Not eligible"), 
                                                          to = c(1, 1, 1, 1, 0)))) #Not eligible #9%
data$Visatime_4c <- as.numeric.factor(as.factor(mapvalues(data$knowledge_nanny, 
                                                          from = c("Less than 1 year", "1 to 3 years", "3 to 10 years", "More than 10 years", "Not eligible"), 
                                                          to = c(1, 0, 0, 0, 0)))) # One to 3 years #40%
data$Visatime_5c <- as.numeric.factor(as.factor(mapvalues(data$knowledge_aunt_unc, 
                                                          from = c("Less than 1 year", "1 to 3 years", "3 to 10 years", "More than 10 years", "Not eligible"), 
                                                          to = c(1, 1, 1, 1, 0)))) # Not eligible #9%
data$visa.know.underestimation <- (data$Visatime_1c + data$Visatime_2c + data$Visatime_4c + data$Visatime_5c)/4
#25% correct is slightly better than 20% correct by chance

### Define other (pre-treatment) covariates

data$Ideology3 <- as.factor(mapvalues(data$ideo5, from = c("Conservative", "Liberal", "Moderate", "Not sure", "Very conservative", "Very liberal"), 
                                                to = c("Conservative", "Liberal", "Moderate", "Moderate", "Conservative", "Liberal")))
data$Ideology2 <- data$Ideology3
data$Ideology2[data$Ideology3 == "Moderate"] <- NA
data$Ideology2 <- factor(data$Ideology2, levels=c("Liberal", "Conservative"))

data$Partyid3 <- as.factor(mapvalues(data$pid3, from = c("Democrat", "Independent", "Republican", "Not sure", "Other"), 
                                     to = c("Democrat", "Independent", "Republican", "Independent", "Independent")))
data$Partyid2 <- data$Partyid3
data$Partyid2[data$Partyid3 == "Independent"] <- NA
data$Partyid2 <- factor(data$Partyid2, levels=c("Democrat", "Republican"))

#Other covariates of interest: 

data$Female2 <- ifelse(data$gender == "Female", 1, 0)
data$Old2 <- ifelse(2023-data$birthyr >= 40, 1, 0) # Median US age
data$White2 <- ifelse(data$race == "White", 1, 0)
data$Hispanic2 <- ifelse(data$hispanic == "Yes", 1, 0)
data$WhiteNonHispanic2 <- ifelse(data$White2 == 1 & data$Hispanic2 == 0, 1, 0)
data$Spanish2 <- ifelse(data$speakspanish == "I can not speak Spanish" | data$speakspanish == "", 0, 1)
data$Urban2 <- ifelse(data$urbanicity2 == "Big city", 1, 0)
data$College2 <- ifelse(data$educ == "Post-grad" | data$educ == "4-year", 1, 0)
data$Rich2 <- ifelse(data$faminc_new == "500,000 or more" | data$faminc_new == "$350,000 - $499,999" | data$faminc_new == "$250,000 - $349,999" |
                     data$faminc_new == "$200,000 - $249,999" | data$faminc_new == "$150,000 - $199,999" | data$faminc_new == "$120,000 - $149,999" |
                     data$faminc_new == "$100,000 - $119,999", 1, 0)

### Define outcomes

## Preferences
data$ProImmPref1 <- as.numeric.factor(as.factor(mapvalues(data$preference1, 
                                                          from = c("Much harder", "Harder", "Neither harder nor easier", "Easier", "Much easier"), 
                                                          to = c(0, 1/4, 2/4, 3/4, 1))))
data$ProImmPref2 <- as.numeric.factor(as.factor(mapvalues(data$preference2, 
                                                          from = c("Decreased a lot", "Decreased a little", "Neither increased nor decreased", "Increased a little", "Increased a lot"), 
                                                          to = c(0, 1/4, 2/4, 3/4, 1))))
data$ProImmPrefIndex <- (data$ProImmPref1 + data$ProImmPref2)/2
cronbach.alpha(data[c("ProImmPref1", "ProImmPref2")]) #0.761

data$ProImm2 <- ifelse(data$ProImmPrefIndex > 0.5, 1, 0) #46% pro-immigration
data$AntiImm2 <- ifelse(data$ProImmPrefIndex < 0.5, 1, 0) #30% pro-immigration
## Manipulation checks

data$ImmBurden <- as.numeric.factor(as.factor(mapvalues(data$belief1, 
                                                        from = c("Not burdensome at all", "Not very burdensome", "A little burdensome", "Very burdensome"), 
                                                        to = c(0/3, 1/3, 2/3, 3/3))))
data$ImmRestrictiveness <- as.numeric.factor(as.factor(mapvalues(data$belief2, 
                                                                 from = c("Very low", "A little low", "A little high", "Very high"), 
                                                                 to = c(3/3, 2/3, 1/3, 0/3))))
data$ImmDifficultyIndex <- (data$ImmBurden + data$ImmRestrictiveness)/2

### Save updated data

save(data, file = "YouGov.Rdata")

### Load updated data

load("YouGov.Rdata")

### Table 1: Exploratory analysis of subgroup differences of pre-treatment knowledge

## Survey package

dataS <- svydesign(data = data, ids = ~1, strata = NULL, weights = ~weight)

# create a list of groups of interest
groups.of.interest <- c ("Female2", "Old2", "WhiteNonHispanic2", "Spanish2", "College2", "Rich2", "Partyid2", "Ideology2")
group.means <- aggregate(visa.know.precise ~ Female2 + Old2 + WhiteNonHispanic2 + Spanish2 + College2 + Rich2 + Ideology2, dataS$variables, mean)

Female2.ttest <- svyttest(visa.know.precise ~ Female2, dataS)
Old2.ttest <- svyttest(visa.know.precise ~ Old2, dataS)
WhiteNonHispanic2.ttest <- svyttest(visa.know.precise ~ WhiteNonHispanic2, dataS)
Spanish2.ttest <- svyttest(visa.know.precise ~ Spanish2, dataS)
College2.ttest <- svyttest(visa.know.precise ~ College2, dataS)
Rich2.ttest <- svyttest(visa.know.precise ~ Rich2, dataS)
Partyid2.ttest <- svyttest(visa.know.precise ~ Partyid2, dataS)
Ideology2.ttest <- svyttest(visa.know.precise ~ Ideology2, dataS)

saved.ttests <- list(Female2.ttest, Old2.ttest, WhiteNonHispanic2.ttest, Spanish2.ttest, College2.ttest, Rich2.ttest, Partyid2.ttest, Ideology2.ttest)

Female2.ttest2 <- svyttest(visa.know.close ~ Female2, dataS)
Old2.ttest2 <- svyttest(visa.know.close ~ Old2, dataS)
WhiteNonHispanic2.ttest2 <- svyttest(visa.know.close ~ WhiteNonHispanic2, dataS)
Spanish2.ttest2 <- svyttest(visa.know.close ~ Spanish2, dataS)
College2.ttest2 <- svyttest(visa.know.close ~ College2, dataS)
Rich2.ttest2 <- svyttest(visa.know.close ~ Rich2, dataS)
Partyid2.ttest2 <- svyttest(visa.know.close ~ Partyid2, dataS)
Ideology2.ttest2 <- svyttest(visa.know.close ~ Ideology2, dataS)

saved.ttests2 <- list(Female2.ttest2, Old2.ttest2, WhiteNonHispanic2.ttest2, Spanish2.ttest2, College2.ttest2, Rich2.ttest2, Partyid2.ttest2, Ideology2.ttest2)

Female2.ttest3 <- svyttest(Visatime_5b ~ Female2, dataS)
Old2.ttest3 <- svyttest(Visatime_5b ~ Old2, dataS)
WhiteNonHispanic2.ttest3 <- svyttest(Visatime_5b ~ WhiteNonHispanic2, dataS)
Spanish2.ttest3 <- svyttest(Visatime_5b ~ Spanish2, dataS)
College2.ttest3 <- svyttest(Visatime_5b ~ College2, dataS)
Rich2.ttest3 <- svyttest(Visatime_5b ~ Rich2, dataS)
Partyid2.ttest3 <- svyttest(Visatime_5b ~ Partyid2, dataS)
Ideology2.ttest3 <- svyttest(Visatime_5b ~ Ideology2, dataS)

saved.ttests3 <- list(Female2.ttest3, Old2.ttest3, WhiteNonHispanic2.ttest3, Spanish2.ttest3, College2.ttest3, Rich2.ttest3, Partyid2.ttest3, Ideology2.ttest3)

# extract the relevant information from each result and store it in a data frame
table <- data.frame(
  group.of.interest = groups.of.interest,
#  difference.aunt = sapply(saved.ttests, function (x) round(x$estimate[1], 3)), # overall mean 9%
  ci.difference.aunt = sapply(saved.ttests3, function(x) paste("(", round(x$conf.int[1], 2), ",", round(x$conf.int[2], 2), ")", sep = "")),
  p.value.aunt = sapply(saved.ttests3, function(x) round(x$p.value, 3)), # overall mean25%
#  difference.precise = sapply(saved.ttests, function (x) round(x$estimate[1], 3)),
  ci.difference.precise = sapply(saved.ttests, function(x) paste("(", round(x$conf.int[1], 2), ",", round(x$conf.int[2], 2), ")", sep = "")),
  p.value.precise = sapply(saved.ttests, function(x) round(x$p.value, 3)),
#  difference.approximate = sapply(saved.ttests2, function (x) round(x$estimate[1], 3)), #overall mean 40%
  ci.difference.approx = sapply(saved.ttests2, function(x) paste ("(", round (x$conf.int[1], 2), ",", round(x$conf.int[2], 2), ")", sep = "")),
p.value.aprox = sapply(saved.ttests2, function(x) round(x$p.value, 3))
)
table[nrow(table) + 1,] <- c("general.population", "(0.065, 0.1)", "", "(0.24, 0.27)", "", "(0.38, 0.41)", "")
rownames(table) <- NULL 

# load xtable package
library(xtable)
# create a latex table from the data frame
table <- xtable(table)
# print the table
print(table)

### Updated Figure 1

# Function to calculate weighted means and confidence intervals
calculate_stats <- function(data, outcome, group, weight) {
  stats <- data %>%
    group_by(!!sym(group)) %>%
    summarise(
      mean = weighted.mean(!!sym(outcome), w = !!sym(weight), na.rm = TRUE),
      variance = sum(!!sym(weight) * (!!sym(outcome) - mean)^2, na.rm = TRUE) / sum(!!sym(weight), na.rm = TRUE),
      n = n(),
      .groups = 'drop'
    ) %>%
    mutate(
      se = sqrt(variance / n),
      lower95 = mean - qt(0.975, n - 1) * se,
      upper95 = mean + qt(0.975, n - 1) * se
    )
  return(stats)
}

# Calculate weighted statistics for Treatment3 (Control, Burdensome, Restrictive)
stats_treatment3_difficulty <- calculate_stats(data, "ImmDifficultyIndex", "Treatment3", "weight")
stats_treatment3_preference <- calculate_stats(data, "ProImmPrefIndex", "Treatment3", "weight")

# Calculate weighted statistics for Treatment2 (0 = Placebo Control, 1 = Combined Treatment)
stats_treatment2_combined <- data %>%
  filter(Treatment2 == 1) %>%
  mutate(Treatment3 = "Combined Treatment")

stats_treatment2_difficulty <- calculate_stats(stats_treatment2_combined, "ImmDifficultyIndex", "Treatment3", "weight")
stats_treatment2_preference <- calculate_stats(stats_treatment2_combined, "ProImmPrefIndex", "Treatment3", "weight")

# Combine the stats from Treatment3 and the "Combined Treatment" from Treatment2
stats_difficulty <- rbind(
  stats_treatment3_difficulty,
  stats_treatment2_difficulty
)

stats_preference <- rbind(
  stats_treatment3_preference,
  stats_treatment2_preference
)

# Rename for plotting purposes without touching the original data
stats_difficulty$Treatment3 <- factor(stats_difficulty$Treatment3, 
                                      levels = c("Control", "Burdensome", 
                                                 "Restrictive", "Combined Treatment"))

stats_preference$Treatment3 <- factor(stats_preference$Treatment3, 
                                      levels = c("Control", "Burdensome", 
                                                 "Restrictive", "Combined Treatment"))

# Re-level Treatment3 so that "Placebo Control" is the reference level for comparisons in Treatment2
data <- data %>%
  mutate(Treatment2 = factor(Treatment2, levels = c(0, 1), labels = c("Placebo Control", "Combined Treatment")))

# Calculate the linear models for difference-in-means
model_difficulty <- lm(ImmDifficultyIndex ~ Treatment3, data = data, weights = weight)
model_preference <- lm(ProImmPrefIndex ~ Treatment3, data = data, weights = weight)

# Extract the difference-in-means (d) and p-values for difficulty (Treatment3 for Burdensome and Restrictive)
coef_difficulty_burdensome <- summary(model_difficulty)$coefficients["Treatment3Burdensome", c("Estimate", "Pr(>|t|)")]
coef_difficulty_restrictive <- summary(model_difficulty)$coefficients["Treatment3Restrictive", c("Estimate", "Pr(>|t|)")]

# Extract the difference-in-means (d) and p-values for the combined treatment from Treatment2
model_combined_difficulty <- lm(ImmDifficultyIndex ~ Treatment2, data = data, weights = weight)
coef_difficulty_combined <- summary(model_combined_difficulty)$coefficients["Treatment2Combined Treatment", c("Estimate", "Pr(>|t|)")]

# Create labels for difficulty (d and p-value), ensuring correct formatting for p-values
label_difficulty_burdensome <- sprintf("d=%.3f, p=%s", 
                                       coef_difficulty_burdensome[1], 
                                       format.pval(coef_difficulty_burdensome[2], eps = 0.01, digits = 2))

label_difficulty_restrictive <- sprintf("d=%.3f, p=%s", 
                                        coef_difficulty_restrictive[1], 
                                        format.pval(coef_difficulty_restrictive[2], eps = 0.001, digits = 2))

label_difficulty_combined <- sprintf("d=%.3f, p=%s (pre-reg. spec.)", 
                                     coef_difficulty_combined[1], 
                                     format.pval(coef_difficulty_combined[2], eps = 0.001, digits = 2))

# Repeat the same steps for preference index
coef_preference_burdensome <- summary(model_preference)$coefficients["Treatment3Burdensome", c("Estimate", "Pr(>|t|)")]
coef_preference_restrictive <- summary(model_preference)$coefficients["Treatment3Restrictive", c("Estimate", "Pr(>|t|)")]

# Extract the difference-in-means (d) and p-values for the combined treatment from Treatment2
model_combined_preference <- lm(ProImmPrefIndex ~ Treatment2, data = data, weights = weight)
coef_preference_combined <- summary(model_combined_preference)$coefficients["Treatment2Combined Treatment", c("Estimate", "Pr(>|t|)")]

# Create labels for preference (d and p-value), ensuring correct formatting for p-values
label_preference_burdensome <- sprintf("d=%.3f, p=%s", 
                                       coef_preference_burdensome[1], 
                                       format.pval(coef_preference_burdensome[2], eps = 0.001, digits = 2))

label_preference_restrictive <- sprintf("d=%.3f, p=%s", 
                                        coef_preference_restrictive[1], 
                                        format.pval(coef_preference_restrictive[2], eps = 0.01, digits = 2))

label_preference_combined <- sprintf("d=%.3f, p=%s (pre-reg. spec.)", 
                                     coef_preference_combined[1], 
                                     format.pval(coef_preference_combined[2], eps = 0.01, digits = 2))

# Plot for ImmDifficultyIndex with all treatments and brackets, renaming levels in the plot
p1 <- ggplot(stats_difficulty, aes(x = Treatment3, y = mean)) +
  geom_point() +
  geom_errorbar(aes(ymin = lower95, ymax = upper95), width = 0.15) +
  ylab("Believe Immigration is Difficult (Index)\n") + ylim(0.35, 0.9) +
  theme_minimal() +
  theme(text = element_text(size = 18),
        axis.title.x = element_blank()) +
  scale_x_discrete(labels = c("Control" = "Placebo\nControl", 
                              "Burdensome" = "Burdensome\nTreatment", 
                              "Restrictive" = "Restrictive\nTreatment", 
                              "Combined Treatment" = "All Treatments\nCombined")) +  # Renaming for plot
  geom_bracket(
    xmin = "Control", xmax = "Burdensome", 
    y.position = 0.8, label = label_difficulty_burdensome, label.size = 4
  ) +
  geom_bracket(
    xmin = "Control", xmax = "Restrictive", 
    y.position = 0.85, label = label_difficulty_restrictive, label.size = 4
  ) +
  geom_bracket(
    xmin = "Control", xmax = "Combined Treatment", 
    y.position = 0.9, label = label_difficulty_combined, label.size = 4
  )

# Plot for ProImmPrefIndex with all treatments and brackets, renaming levels in the plot
p2 <- ggplot(stats_preference, aes(x = Treatment3, y = mean)) +
  geom_point() +
  geom_errorbar(aes(ymin = lower95, ymax = upper95), width = 0.15) +
  ylab("Have Pro-immigration Preferences (Index)\n") + ylim(0.35, 0.9) +
  theme_minimal() +
  theme(text = element_text(size = 18),
        axis.title.x = element_blank()) +
  scale_x_discrete(labels = c("Control" = "Placebo\nControl", 
                              "Burdensome" = "Burdensome\nTreatment", 
                              "Restrictive" = "Restrictive\nTreatment", 
                              "Combined Treatment" = "All Treatments\nCombined")) +  # Renaming for plot
  geom_bracket(
    xmin = "Control", xmax = "Burdensome", 
    y.position = 0.8, label = label_preference_burdensome, label.size = 4
  ) +
  geom_bracket(
    xmin = "Control", xmax = "Restrictive", 
    y.position = 0.85, label = label_preference_restrictive, label.size = 4
  ) +
  geom_bracket(
    xmin = "Control", xmax = "Combined Treatment", 
    y.position = 0.9, label = label_preference_combined, label.size = 4
  )

# Combine the plots
combined_figure <- gridExtra::grid.arrange(p1, p2, ncol = 2)

# Save the combined figure
ggsave("Figure1_upd2.pdf", plot = combined_figure, width = 12, height = 5.5, dpi = 300)

#####################
#### APPENDIX
#####################

####

### Load updated data

load("YouGov.Rdata")

### Figure A1 Extra figure on separate treatment effects #Add survey weights (Figure A1)

# Function to calculate weighted means and confidence intervals
calculate_stats <- function(data, outcome, group, weight) {
  stats <- data %>%
    group_by(!!sym(group)) %>%
    summarise(
      mean = weighted.mean(!!sym(outcome), w = !!sym(weight), na.rm = TRUE),
      variance = sum(!!sym(weight) * (!!sym(outcome) - mean)^2, na.rm = TRUE) / sum(!!sym(weight), na.rm = TRUE),
      n = n(),
      .groups = 'drop'
    ) %>%
    mutate(
      se = sqrt(variance / n),
      lower95 = mean - qt(0.975, n - 1) * se,
      upper95 = mean + qt(0.975, n - 1) * se
    )
  return(stats)
}

# Calculate weighted statistics for Treatment3 (Control, Burdensome, Restrictive)
stats_treatment3_restrictiveness <- calculate_stats(data, "ImmRestrictiveness", "Treatment3", "weight")
stats_treatment3_burden <- calculate_stats(data, "ImmBurden", "Treatment3", "weight")
stats_treatment3_pref1 <- calculate_stats(data, "ProImmPref1", "Treatment3", "weight")
stats_treatment3_pref2 <- calculate_stats(data, "ProImmPref2", "Treatment3", "weight")

# Calculate weighted statistics for Treatment2 (0 = Placebo Control, 1 = Combined Treatment)
stats_treatment2_combined <- data %>%
  filter(Treatment2 == 1) %>%
  mutate(Treatment3 = "Combined Treatment")

stats_treatment2_restrictiveness <- calculate_stats(stats_treatment2_combined, "ImmRestrictiveness", "Treatment3", "weight")
stats_treatment2_burden <- calculate_stats(stats_treatment2_combined, "ImmBurden", "Treatment3", "weight")
stats_treatment2_pref1 <- calculate_stats(stats_treatment2_combined, "ProImmPref1", "Treatment3", "weight")
stats_treatment2_pref2 <- calculate_stats(stats_treatment2_combined, "ProImmPref2", "Treatment3", "weight")

# Combine the stats from Treatment3 and the "Combined Treatment" from Treatment2
stats_restrictiveness <- rbind(
  stats_treatment3_restrictiveness,
  stats_treatment2_restrictiveness
)

stats_burden <- rbind(
  stats_treatment3_burden,
  stats_treatment2_burden
)

stats_pref1 <- rbind(
  stats_treatment3_pref1,
  stats_treatment2_pref1
)

stats_pref2 <- rbind(
  stats_treatment3_pref2,
  stats_treatment2_pref2
)

# Rename for plotting purposes without touching the original data
stats_restrictiveness$Treatment3 <- factor(stats_restrictiveness$Treatment3, 
                                           levels = c("Control", "Burdensome", 
                                                      "Restrictive", "Combined Treatment"))

stats_burden$Treatment3 <- factor(stats_burden$Treatment3, 
                                  levels = c("Control", "Burdensome", 
                                             "Restrictive", "Combined Treatment"))

stats_pref1$Treatment3 <- factor(stats_pref1$Treatment3, 
                                 levels = c("Control", "Burdensome", 
                                            "Restrictive", "Combined Treatment"))

stats_pref2$Treatment3 <- factor(stats_pref2$Treatment3, 
                                 levels = c("Control", "Burdensome", 
                                            "Restrictive", "Combined Treatment"))

# Re-level Treatment3 so that "Placebo Control" is the reference level for comparisons in Treatment2
data <- data %>%
  mutate(Treatment2 = factor(Treatment2, levels = c(0, 1), labels = c("Placebo Control", "Combined Treatment")))

# Calculate the linear models for difference-in-means
model_restrictiveness <- lm(ImmRestrictiveness ~ Treatment3, data = data, weights = weight)
model_burden <- lm(ImmBurden ~ Treatment3, data = data, weights = weight)
model_pref1 <- lm(ProImmPref1 ~ Treatment3, data = data, weights = weight)
model_pref2 <- lm(ProImmPref2 ~ Treatment3, data = data, weights = weight)

# Extract the difference-in-means (d) and p-values for restrictiveness, burden, preference 1, and preference 2
coef_restrictiveness_burdensome <- summary(model_restrictiveness)$coefficients["Treatment3Burdensome", c("Estimate", "Pr(>|t|)")]
coef_restrictiveness_restrictive <- summary(model_restrictiveness)$coefficients["Treatment3Restrictive", c("Estimate", "Pr(>|t|)")]

coef_burden_burdensome <- summary(model_burden)$coefficients["Treatment3Burdensome", c("Estimate", "Pr(>|t|)")]
coef_burden_restrictive <- summary(model_burden)$coefficients["Treatment3Restrictive", c("Estimate", "Pr(>|t|)")]

coef_pref1_burdensome <- summary(model_pref1)$coefficients["Treatment3Burdensome", c("Estimate", "Pr(>|t|)")]
coef_pref1_restrictive <- summary(model_pref1)$coefficients["Treatment3Restrictive", c("Estimate", "Pr(>|t|)")]

coef_pref2_burdensome <- summary(model_pref2)$coefficients["Treatment3Burdensome", c("Estimate", "Pr(>|t|)")]
coef_pref2_restrictive <- summary(model_pref2)$coefficients["Treatment3Restrictive", c("Estimate", "Pr(>|t|)")]

# Extract the difference-in-means (d) and p-values for the combined treatment from Treatment2
model_combined_restrictiveness <- lm(ImmRestrictiveness ~ Treatment2, data = data, weights = weight)
coef_restrictiveness_combined <- summary(model_combined_restrictiveness)$coefficients["Treatment2Combined Treatment", c("Estimate", "Pr(>|t|)")]

model_combined_burden <- lm(ImmBurden ~ Treatment2, data = data, weights = weight)
coef_burden_combined <- summary(model_combined_burden)$coefficients["Treatment2Combined Treatment", c("Estimate", "Pr(>|t|)")]

model_combined_pref1 <- lm(ProImmPref1 ~ Treatment2, data = data, weights = weight)
coef_pref1_combined <- summary(model_combined_pref1)$coefficients["Treatment2Combined Treatment", c("Estimate", "Pr(>|t|)")]

model_combined_pref2 <- lm(ProImmPref2 ~ Treatment2, data = data, weights = weight)
coef_pref2_combined <- summary(model_combined_pref2)$coefficients["Treatment2Combined Treatment", c("Estimate", "Pr(>|t|)")]

# Create labels for restrictiveness, burden, preference 1, and preference 2 (d and p-value), ensuring correct formatting for p-values
label_restrictiveness_burdensome <- sprintf("d=%.3f, p=%s", 
                                            coef_restrictiveness_burdensome[1], 
                                            format.pval(coef_restrictiveness_burdensome[2], eps = 0.01, digits = 2))

label_restrictiveness_restrictive <- sprintf("d=%.3f, p=%s", 
                                             coef_restrictiveness_restrictive[1], 
                                             format.pval(coef_restrictiveness_restrictive[2], eps = 0.01, digits = 2))

label_restrictiveness_combined <- sprintf("d=%.3f, p=%s", 
                                          coef_restrictiveness_combined[1], 
                                          format.pval(coef_restrictiveness_combined[2], eps = 0.01, digits = 2))

label_burden_burdensome <- sprintf("d=%.3f, p=%s", 
                                   coef_burden_burdensome[1], 
                                   format.pval(coef_burden_burdensome[2], eps = 0.01, digits = 2))

label_burden_restrictive <- sprintf("d=%.3f, p=%s", 
                                    coef_burden_restrictive[1], 
                                    format.pval(coef_burden_restrictive[2], eps = 0.01, digits = 2))

label_burden_combined <- sprintf("d=%.3f, p=%s", 
                                 coef_burden_combined[1], 
                                 format.pval(coef_burden_combined[2], eps = 0.01, digits = 2))

label_pref1_burdensome <- sprintf("d=%.3f, p=%s", 
                                  coef_pref1_burdensome[1], 
                                  format.pval(coef_pref1_burdensome[2], eps = 0.01, digits = 2))

label_pref1_restrictive <- sprintf("d=%.3f, p=%s", 
                                   coef_pref1_restrictive[1], 
                                   format.pval(coef_pref1_restrictive[2], eps = 0.01, digits = 2))

label_pref1_combined <- sprintf("d=%.3f, p=%s", 
                                coef_pref1_combined[1], 
                                format.pval(coef_pref1_combined[2], eps = 0.01, digits = 2))

label_pref2_burdensome <- sprintf("d=%.3f, p=%s", 
                                  coef_pref2_burdensome[1], 
                                  format.pval(coef_pref2_burdensome[2], eps = 0.01, digits = 2))

label_pref2_restrictive <- sprintf("d=%.3f, p=%s", 
                                   coef_pref2_restrictive[1], 
                                   format.pval(coef_pref2_restrictive[2], eps = 0.01, digits = 2))

label_pref2_combined <- sprintf("d=%.3f, p=%s", 
                                coef_pref2_combined[1], 
                                format.pval(coef_pref2_combined[2], eps = 0.01, digits = 2))

# Plot for ImmRestrictiveness and ImmBurden with all treatments and brackets
p1 <- ggplot(stats_restrictiveness, aes(x = Treatment3, y = mean)) +
  geom_point() +
  geom_errorbar(aes(ymin = lower95, ymax = upper95), width = 0.15) +
  ylab("Believe Immigration is Restrictive\n") + ylim(0.35, 0.95) +
  theme_minimal() +
  theme(text = element_text(size = 18),
        axis.title.x = element_blank()) +
  scale_x_discrete(labels = c("Control" = "Placebo\nControl", 
                              "Burdensome" = "Burdensome\nTreatment", 
                              "Restrictive" = "Restrictive\nTreatment", 
                              "Combined Treatment" = "All Treatments\nCombined")) +
  geom_bracket(
    xmin = "Control", xmax = "Burdensome", 
    y.position = 0.825, label = label_restrictiveness_burdensome, label.size = 4
  ) +
  geom_bracket(
    xmin = "Control", xmax = "Restrictive", 
    y.position = 0.875, label = label_restrictiveness_restrictive, label.size = 4
  ) +
  geom_bracket(
    xmin = "Control", xmax = "Combined Treatment", 
    y.position = 0.925, label = label_restrictiveness_combined, label.size = 4
  )

p2 <- ggplot(stats_burden, aes(x = Treatment3, y = mean)) +
  geom_point() +
  geom_errorbar(aes(ymin = lower95, ymax = upper95), width = 0.15) +
  ylab("Believe Immigration is Burdensome\n") + ylim(0.35, 0.95) +
  theme_minimal() +
  theme(text = element_text(size = 18),
        axis.title.x = element_blank()) +
  scale_x_discrete(labels = c("Control" = "Placebo\nControl", 
                              "Burdensome" = "Burdensome\nTreatment", 
                              "Restrictive" = "Restrictive\nTreatment", 
                              "Combined Treatment" = "All Treatments\nCombined")) +
  geom_bracket(
    xmin = "Control", xmax = "Burdensome", 
    y.position = 0.825, label = label_burden_burdensome, label.size = 4
  ) +
  geom_bracket(
    xmin = "Control", xmax = "Restrictive", 
    y.position = 0.875, label = label_burden_restrictive, label.size = 4
  ) +
  geom_bracket(
    xmin = "Control", xmax = "Combined Treatment", 
    y.position = 0.925, label = label_burden_combined, label.size = 4
  )

# Plot for ProImmPref1 and ProImmPref2 with all treatments and brackets
p3 <- ggplot(stats_pref1, aes(x = Treatment3, y = mean)) +
  geom_point() +
  geom_errorbar(aes(ymin = lower95, ymax = upper95), width = 0.15) +
  ylab("Prefer Increasing Immigration\n") + ylim(0.35, 0.95) +
  theme_minimal() +
  theme(text = element_text(size = 18),
        axis.title.x = element_blank()) +
  scale_x_discrete(labels = c("Control" = "Placebo\nControl", 
                              "Burdensome" = "Burdensome\nTreatment", 
                              "Restrictive" = "Restrictive\nTreatment", 
                              "Combined Treatment" = "All Treatments\nCombined")) +
  geom_bracket(
    xmin = "Control", xmax = "Burdensome", 
    y.position = 0.825, label = label_pref1_burdensome, label.size = 4
  ) +
  geom_bracket(
    xmin = "Control", xmax = "Restrictive", 
    y.position = 0.875, label = label_pref1_restrictive, label.size = 4
  ) +
  geom_bracket(
    xmin = "Control", xmax = "Combined Treatment", 
    y.position = 0.925, label = label_pref1_combined, label.size = 4
  )

p4 <- ggplot(stats_pref2, aes(x = Treatment3, y = mean)) +
  geom_point() +
  geom_errorbar(aes(ymin = lower95, ymax = upper95), width = 0.15) +
  ylab("Prefer Easier Immigration\n") + ylim(0.35, 0.95) +
  theme_minimal() +
  theme(text = element_text(size = 18),
        axis.title.x = element_blank()) +
  scale_x_discrete(labels = c("Control" = "Placebo\nControl", 
                              "Burdensome" = "Burdensome\nTreatment", 
                              "Restrictive" = "Restrictive\nTreatment", 
                              "Combined Treatment" = "All Treatments\nCombined")) +
  geom_bracket(
    xmin = "Control", xmax = "Burdensome", 
    y.position = 0.825, label = label_pref2_burdensome, label.size = 4
  ) +
  geom_bracket(
    xmin = "Control", xmax = "Restrictive", 
    y.position = 0.875, label = label_pref2_restrictive, label.size = 4
  ) +
  geom_bracket(
    xmin = "Control", xmax = "Combined Treatment", 
    y.position = 0.925, label = label_pref2_combined, label.size = 4
  )

# Combine the plots into a 2x2 grid
combined_appendix_figure <- gridExtra::grid.arrange(p1, p2, p3, p4, ncol = 2)

# Save the combined appendix figure
ggsave("FigureA1_upd.pdf", plot = combined_appendix_figure, width = 12, height = 11, dpi = 300)

### Table A1 upd

# Linear models for difference-in-means
model_difficulty <- lm(ImmDifficultyIndex ~ Treatment2, data = data, weights=weight)
model_preference <- lm(ProImmPrefIndex ~ Treatment2, data = data, weights=weight)
model_difficulty3 <- lm(ImmDifficultyIndex ~ Treatment3, data = data, weights=weight)
model_preference3 <- lm(ProImmPrefIndex ~ Treatment3, data = data, weights=weight)
model_difficulty_controls <- lm(ImmDifficultyIndex ~ Treatment2 + visa.know.close +
                                  Female2 + Old2 + WhiteNonHispanic2 + Spanish2 + College2 + Rich2 + Partyid3, data = data, weights=weight)
model_preference_controls <- lm(ProImmPrefIndex ~ Treatment2 + visa.know.close +
                                  Female2 + Old2 + WhiteNonHispanic2 + Spanish2 + College2 + Rich2 + Partyid3, data = data, weights=weight)
model_difficulty3_controls <- lm(ImmDifficultyIndex ~ Treatment3 + visa.know.close +
                                  Female2 + Old2 + WhiteNonHispanic2 + Spanish2 + College2 + Rich2 + Partyid3, data = data, weights=weight)
model_preference3_controls <- lm(ProImmPrefIndex ~ Treatment3 + visa.know.close +
                                  Female2 + Old2 + WhiteNonHispanic2 + Spanish2 + College2 + Rich2 + Partyid3, data = data, weights=weight)

## Quick fix for stargazer <= 5.2.3 is.na() issue with long model names in R >= 4.2
# Unload stargazer if loaded
detach("package:stargazer",unload=T)
# Delete it
remove.packages("stargazer")
# Download the source
download.file("https://cran.r-project.org/src/contrib/stargazer_5.2.3.tar.gz", destfile = "stargazer_5.2.3.tar.gz")
# Unpack
untar("stargazer_5.2.3.tar.gz")
# Read the sourcefile with .inside.bracket fun
stargazer_src <- readLines("stargazer/R/stargazer-internal.R")
# Move the length check 5 lines up so it precedes is.na(.)
stargazer_src[1990] <- stargazer_src[1995]
stargazer_src[1995] <- ""
# Save back
writeLines(stargazer_src, con="stargazer/R/stargazer-internal.R")
# Compile and install the patched package
install.packages("stargazer", repos = NULL, type="source")

library(stargazer)

stargazer(model_difficulty, model_difficulty3,
  model_difficulty_controls, model_difficulty3_controls,
  model_preference, model_preference3,
          model_preference_controls, model_preference3_controls,
           type = "latex",
           title = "Table 1. (Positive) Effects of Immigration Policy Information on Beliefs and Preferences",
           dep.var.labels = c("Believe Immigration is Difficult", "Pro-Immigration Preference"),
           covariate.labels = c("Combined Treatment", 
                                "Burdensome Treatment", "Restrictive Treatment",
                                "Policy knowledge\n(visa almost correct)",
                                "Female", "Old (40+)", "White Non-Hisp.", 
                                "Spanish-speaking", "College-educated",
                                "High-income", "Independent", "Republican"),
           omit.stat = c("f", "ser"),
           omit = "Intercept",
           notes = c("Standard errors in parentheses",
                     "$^{*}$p$<$0.05; $^{**}$p$<$0.01; $^{***}$p$<$0.001"),
           notes.append = FALSE,
           star.char = c("*", "**", "***"),
           star.cutoffs = c(0.05, 0.01, 0.001),
           header = FALSE,
           float = FALSE,
           font.size = "small",
           no.space = TRUE,
           align = TRUE)

### Heterogeneous effects table (Tables A2 and A3)

data$visa.know.close2 <- ifelse(data$visa.know.close >= 0.4, 1, 0)

library(stargazer)

# List of control variables
controls <- c("visa.know.close2", "Female2", "Old2", "WhiteNonHispanic2", "Spanish2", "College2", "Rich2", "Partyid3")

# Function to create interaction models
create_interaction_models <- function(dv) {
  lapply(controls, function(var) {
    formula <- as.formula(paste(dv, "~ Treatment2 *", var))
    lm(formula, data = data, weights = weight)
  })
}

# Create models for both dependent variables
models_difficulty <- create_interaction_models("ImmDifficultyIndex")
models_preference <- create_interaction_models("ProImmPrefIndex")

# Function to create stargazer table
create_table <- function(models, title) {
  stargazer(models,
            type = "latex",
            title = title,
            column.labels = c("Policy knowledge", "Female", "Old (40+)", "White Non-Hisp.", 
                              "Spanish-speaking", "College-educated", "High-income", "Party ID"),
            dep.var.labels = c("Believe Immigration is Difficult"),
            covariate.labels = c("Treatment", "Subgroup", "Treatment × Subgroup"),
            omit = "Constant",
            omit.stat = c("f", "ser"),
            notes = c("Standard errors in parentheses", "* p<0.05, ** p<0.01, *** p<0.001"),
            notes.append = FALSE,
            star.char = c("*", "**", "***"),
            font.size = "small",
            no.space = TRUE,
            single.row = TRUE,
            align = TRUE)
}

# Generate tables
table_difficulty <- create_table(models_difficulty, 
                                 "Heterogeneous Treatment Effects on Belief that Immigration is Difficult")

table_preference <- create_table(models_preference, 
                                 "Heterogeneous Treatment Effects on Pro-Immigration Preferences")

# Print tables
cat(table_difficulty)
cat("\n\n")
cat(table_preference)

### Heterogeneous effects table by separate treatments (Tables A4 and A5)

# Function to create interaction models with Treatment3 (Burdensome and Restrictive)
create_interaction_models_treatment3 <- function(dv) {
  lapply(controls, function(var) {
    formula <- as.formula(paste(dv, "~ Treatment3 *", var))
    lm(formula, data = data, weights = weight)
  })
}

# Create models for both dependent variables using Treatment3
models_difficulty_treatment3 <- create_interaction_models_treatment3("ImmDifficultyIndex")
models_preference_treatment3 <- create_interaction_models_treatment3("ProImmPrefIndex")

# Function to create stargazer table for Treatment3 subgroup analysis
create_table_treatment3 <- function(models, title) {
  stargazer(models,
            type = "latex",
            title = title,
            column.labels = c("Policy knowledge", "Female", "Old (40+)", "White Non-Hisp.", 
                              "Spanish-speaking", "College-educated", "High-income", "Party ID"),
            dep.var.labels = c("Believe Immigration is Difficult"),
            covariate.labels = c("Burdensome Treatment", "Restrictive Treatment", "Subgroup", 
                                 "Burdensome × Subgroup", "Restrictive × Subgroup"),
            omit = "Constant",
            omit.stat = c("f", "ser"),
            notes = c("Standard errors in parentheses", "* p<0.05, ** p<0.01, *** p<0.001"),
            notes.append = FALSE,
            star.char = c("*", "**", "***"),
            font.size = "small",
            no.space = TRUE,
            single.row = TRUE,
            align = TRUE)
}

# Generate tables for both dependent variables
table_difficulty_treatment3 <- create_table_treatment3(models_difficulty_treatment3, 
                                                       "Heterogeneous Treatment Effects on Belief that Immigration is Difficult (Treatment3)")

table_preference_treatment3 <- create_table_treatment3(models_preference_treatment3, 
                                                       "Heterogeneous Treatment Effects on Pro-Immigration Preferences (Treatment3)")

# Print the LaTeX tables for output
cat(table_difficulty_treatment3)
cat("\n\n")
cat(table_preference_treatment3)